library(ggplot2)
library(texreg)

##
##LOAD DATA
##

##load merged npi/dime/signatory data
npi.signatories <- read.csv('npi_dime_signatories_merged.csv')

##load merged npi/florida voter file data
npi.flvf <- read.csv('npi_flvf.csv')


##
##REGRESSIONS
##

##TABLE 1
t1.m1 <- glm(signatory ~ cfscore.static,
              data=npi.signatories,
              family=binomial(),
              x=T)

t1.m2 <- glm(signatory ~ cfscore.static,
              data=npi.signatories,
              family=binomial(),
              subset=!is.na(gradyear) & !is.na(zip3.cfscore),
              x=T)

t1.m3 <- glm(signatory ~ cfscore.static + female + zip3.cfscore + gradyear + sn,
              data=npi.signatories,
              family=binomial(),
              x=T)

screenreg(list(t1.m1,t1.m2,t1.m3),omit='sn',digits=5)

htmlreg(list(t1.m1,t1.m2,t1.m3),
        omit='sn',
        stars = numeric(0),
        digits=3,
        doctype=TRUE,html.tag=TRUE,head.tag=TRUE,body.tag=TRUE,
        center=TRUE,inline.css = FALSE,
        file='table_1.doc')


##TABLE 2
t2.m1 <- glm(signatory ~ gave.in.2016,
               data=npi.signatories,
               family=binomial(),
               x=T)

t2.m2 <- glm(signatory ~ gave.in.2016,
               data=npi.signatories,
               family=binomial(),
               subset=(!is.na(zip3.cfscore) & !is.na(gradyear) & !is.na(female) & !is.na(sn)),
               x=T)

t2.m3 <- glm(signatory ~ gave.in.2016 + female + zip3.cfscore + gradyear + sn,
               data=npi.signatories,
               family=binomial(),
               x=T)

screenreg(list(t2.m1,t2.m2,t2.m3),omit='sn',digits=5)

htmlreg(list(t2.m1,t2.m2,t2.m3),
        omit='sn',
        stars = numeric(0),
        digits=3,
        doctype=TRUE,html.tag=TRUE,head.tag=TRUE,body.tag=TRUE,
        center=TRUE,inline.css = FALSE,
        file='table_2.doc')




##TABLE 3
t3.m1 <- glm(gave.within.3.months~signatory + gave.in.2016 +gradyear+female+zip3.cfscore + sn,
               data=npi.signatories,
               family=binomial())

t3.m2 <- glm(gave.within.6.months~signatory + gave.in.2016 +gradyear+female+zip3.cfscore + sn,
               data=npi.signatories,
               family=binomial())

t3.m3 <- glm(gave.within.12.months ~ signatory + gave.in.2016 + female + zip3.cfscore +  gradyear + sn,
               data=npi.signatories,
               family=binomial())

screenreg(list(t3.m1,t3.m2,t3.m3),omit='sn',digits=5)

htmlreg(list(t3.m1,t3.m2,t3.m3),
        omit='sn',
        stars = numeric(0),
        digits=3,
        doctype=TRUE,html.tag=TRUE,head.tag=TRUE,body.tag=TRUE,
        center=TRUE,inline.css = FALSE,
        file='table_3.doc')


##
##FIGURE 1: CFSCORE BY COHORT
##

##Subset dataset
npi.signatories$gender <- ifelse(npi.signatories$female == 1,'F','M') #create F/M gender var
zz1 <- (with(npi.signatories[!duplicated(npi.signatories$npi),],
             aggregate((p.to.rep),list(gradyear,gender),function(z) sum(!is.na(z)))))
zz <- (with(npi.signatories[!duplicated(npi.signatories$npi),],
            aggregate((p.to.rep),list(gradyear,gender),mean,na.rm=T)))
zz[,3] <- as.numeric(as.character(zz[,3]))
zz2 <- (with(npi.signatories[!duplicated(npi.signatories$npi),],
             aggregate((p.to.rep),list(gradyear),mean,na.rm=T)))
colnames(zz) <- c('gradyear','gender','cfscore')

##Recode variables to correct format
zz <- rbind(zz,cbind(gradyear=zz2[,1],gender='All',cfscore=zz2[,2]))
zz[zz[,2]=='M',2] <- 'Male'
zz[zz[,2]=='F',2] <- 'Female'
zz <- as.data.frame(zz)
zz$cfscore <- as.numeric(as.character(zz$cfscore))
zz$gradyear <- as.numeric(as.character(zz$gradyear))

##Create ggplot plot
q <- qplot(data=zz,x=gradyear,y=cfscore,shape=gender,linetype=gender,colour=gender) + theme_bw() + geom_line()
q <- q + xlab('Graduation Year')
q <- q + ylab('Contributions to Republicans, %')
q <- q + xlim(1960,2012)
q <- q + scale_y_continuous(limits=c(0,1),expand=c(0,0),labels = scales::percent)
q <- q + scale_color_grey('')
q <- q + scale_linetype_manual('',values=c('Male'=4,'Female'=2,'All'=1))
q <- q + scale_shape('')
q + guides(colour = "legend", linetype = "legend", shape = "legend")
q <- q + theme(legend.position=c(.9,.9),
               panel.grid.minor = element_blank(),
               axis.text=element_text(size=14),
               legend.text=element_text(size=20),
               axis.title=element_text(size=20)
               )
print(q)

##Export eps
setEPS()
postscript(file='figure_1.eps',width=10,height=7.5)
print(q)
dev.off()



###############################################################################
##FIGURE 2: FLORIDA PARTY REG TRENDS
###############################################################################

## load('/workspace/md_participation/data/hpi_fl_panel.rda')

## gy.out <- with(hpi.fl,lm(gradyear~byear))
## predgy <- round(with(hpi.fl,predict(gy.out,data.frame(byear=byear))))
## hpi.fl$gradyear[is.na(hpi.fl$gradyear)] <- predgy[is.na(hpi.fl$gradyear)]


aa.md.rep <- aggregate(npi.flvf$party == 'REP',list(npi.flvf$gradyear),mean,na.rm=T)
aa.md.dem <- aggregate(npi.flvf$party == 'DEM',list(npi.flvf$gradyear),mean,na.rm=T)

gg <- rbind(cbind(aa.md.dem,type='D',prt='D'),
            cbind(aa.md.rep,type='R',prt='R'))

colnames(gg) <- c('year','prep','type','prt')
gg <- as.data.frame(gg)
gg$year <- as.numeric(as.character(gg$year))
gg$prep <- as.numeric(as.character(gg$prep))
gg <- gg[gg$year %in% 1960:2015,]

q1 <- qplot(data=gg,x=year,y=prep,
                 colour=type,
            shape=type,
            linetype=type)
q1 <- q1 + geom_line()

q1 <- q1+ scale_colour_manual('',values=c('R'='red',
                                          'D'='blue'))
q1 <- q1+ scale_linetype_manual('',values=c(
                                     'R'=1,
                                     'D'=1))
q1 <- q1+ scale_shape_manual('',values=c(
                                    'R'=17,
                                    'D'=17))
q1 <- q1 + scale_y_continuous(limits=c(0,.65),expand=c(0,0),labels = scales::percent)

q1 <- q1 + scale_x_continuous('Graduation Year',breaks=seq(1960,2010,10),limits=c(1960,2012))
q1 <- q1 + ylab('Registering as Republicans/Democrats, %')
q1 <- q1 + theme_bw()
q1 <- q1 + theme(legend.position=c(.9,.9),
               panel.grid.minor = element_blank(),
               axis.text=element_text(size=14),
               legend.text=element_text(size=20),
               axis.title=element_text(size=20)
               )

##dev.off()
setEPS()
postscript(file='figure_2.eps',width=11,height=7)
print(q1)
dev.off()


###############################################################################
##FIGURE 3: DISTRIBUTIONS
###############################################################################
gg <- rbind(cbind(npi.signatories$cfscore.static[npi.signatories$signatory==1],'Signed',
                  npi.signatories$cfscore[npi.signatories$signatory==1]),
            cbind(npi.signatories$cfscore.static[npi.signatories$signatory==0],'Did Not Sign',
                  npi.signatories$cfscore[npi.signatories$signatory==1]))
gg <- as.data.frame(gg)
colnames(gg) <- c('ip','type','cf2016')
gg$ip <- as.numeric(as.character(gg$ip))
gg$ip[gg$ip < -2.15] <- -2.15
gg$ip[gg$ip > 2.15] <- 2.15
gg <- gg[complete.cases(gg),]
gg$weights <- ifelse(gg$type =='Signed',1/sum(gg$type =='Signed'),(1/sum(gg$type =='Did Not Sign')))
gg$x <- as.numeric(as.character(gg$ip))


q <- ggplot(data=gg,aes(x=ip,fill='darkgrey',colour='black',group=type)) + geom_histogram(aes(weight=weights))
q <- q + facet_wrap(~type,nrow=2,scales='free_y')
q <- ggplot(data=gg,aes(x=ip,fill=type,colour='black',group=type)) + geom_histogram(position='dodge',aes(weight=weights))
q <- q + scale_fill_manual('',values=c('Signed'='black','Did Not Sign'='darkgrey'))
q <- q + scale_colour_identity()
q <- q + theme_bw()
q <- q + xlab('Physician DIME Score (liberal/conservative)')
q <- q + ylab('Percentage')
q <- q + scale_y_continuous(expand=c(0,0),limits=c(-.03,.27),labels = scales::percent)

##Set theme
q <- q + theme(axis.text.y = element_text(size=10,colour='black'),
               axis.text.x = element_text(size=8,colour='black'),
               title = NULL,
               legend.position=c(.9,.9),
               strip.text.x=element_text(size=14),
               strip.background = element_rect(fill = 'white'),
               plot.title=element_blank(),
               plot.margin =  unit(c(.05, .25, 0.05, 0.25), "lines"),
               axis.title.y = element_text(size=12,colour='black'),
               axis.title.x = element_text(size=16,colour='black'))


##Price DIME score
q <- q + geom_vline(xintercept=1.066,linetype=2)
q <- q +annotate(geom='text',x=1.066 + .01,y=.20,size=5,label='Tom Price',hjust=.50)

##Add in DIME scores for reference politicians
ttext <- rbind(c(x=-1.88,label='Bernie Sanders',''),
               c(x=-1.21,label='Hillary Clinton',''),
               c(x=-1.16,label='Barack Obama',''),
               c(x=0.754,label='John McCain',''),
               c(x=0.904,label='Mitt Romney',''),
               c(x=1.29,label='Donald Trump',''),
               c(x=1.45,label='Ted Cruz',''))
count <- 1
for(x in 1:nrow(ttext)){
    shift <- ifelse(count %% 2 == 1,.01,0)
    q <- q + annotate(geom='text',x=as.numeric(ttext[x,1]),y=(-.025)+shift,label=ttext[x,2],
                      size=3,hjust=.5)#,family=c("sans")

    q <- q + annotate(geom='point',x=as.numeric(ttext[x,1]),y=(-0.025)+shift+.005,size=1)
    count <- count+1
}

print(q)

setEPS()
postscript(file='figure_3.eps',
    width=11,height=7)
print(q)
dev.off()



